setwd("<your directory or folder path>")
df1 <- read.csv("20191114 Example 16.1 Multivariate Data.csv")

attach(df1)
# the dataset has 11 columns:
# Group, X0-X9
library(MASS)
wss <- c()
max.clusters <- 5
dfcluster <- cbind(X0,X1,X2,X3,X4,X5,X6,X7,X8,X9) # this is a dataframe of just the response variables
wss[1] <- 0
for (k in 2:max.clusters) {
  clus1 <- kmeans(dfcluster,k,algorithm=c("Hartigan-Wong"))
  wss[k] <- clus1$tot.withinss
  }
wss2 <- wss[2:max.clusters]
no.points <- max-clusters-1
plot(x=1:no.points,y=wss2)
clusid <- clus1$cluster #this is just renaming the column that has the cluster ID computed by kmeans

df2 <- cbind(df1,clusid)#glue the cluster ID column onto the original data
#
library(mclust)
cluster.detect <- Mclust(dfcluster,G=1:5)
plot(cluster.detect)

# the principle components will only be computed for 
# the variables in the dataframe called dfcluster
#
pcomp <- princomp(dfcluster,cor=TRUE,scores=TRUE)#compute principal componenets for the original data

df3 <- cbind(df2,pcomp$scores)
#
#
# the following function (manova) creates a manova object I called "man"
# the summary tells you whether the factor df3$clusid
# affects the multivariate response (cbind(df3$Comp.1,df3$Comp.2,df3$Comp.3))
# signifcantly.
#
# the multivariate response matrix was created using the cbind function
# to stick the three columns together.  You can stick any columns together
#
# the cluster id variable was "coerced" into being 
# a factor with discrete levels (there are 3 clusters I created
# using the function kmeans
#
man <- manova(cbind(df3$q1feq,df3$q3freq,df3$ctfreq) ~ f1 + f2)
summary(man)

#the columns of pcomp$scores will be labled Comp.1, Comp.2,...Comp.k
#where k is the number of variables included in the dataframe used
#in the call to princomp

detach(df1)
detach(df3)
attach(df3)

#this is code to create the nice plot that shows the clusters
# to plot points, create separate variables for Comp.1 and Comp.2 by Cluster Number:
comp1_1 <- Comp.1[which(clusid==1)];
comp2_1 <- Comp.2[which(clusid==1)];
comp1_2 <- Comp.1[which(clusid==2)];
comp2_2 <- Comp.2[which(clusid==2)];
comp1_3 <- Comp.1[which(clusid==3)];
comp2_3 <- Comp.2[which(clusid==3)];



# First create the plot with one of the groups of points
# note that pch is a code that tells R which symbol to use
# ylim is a parameter that tells R the range of values on the y axis that will be plotted
# there is another parameter called xlim for the x axis that is also be specified

plot(comp2_2,comp1_2,pch=21, ylim= c(-6,6), xlim = c(-2.5,2.5),xlab="pc2",ylab ="pc1",main= "Clusters")

# Now add the points from the other clusters
points(comp2_1,comp1_1,pch =22)
points(comp2_3,comp1_3,pch=18)

# Create the legend; Note that pch in legend is specified in the order of group names:

legend(0.7,-2,c("Cluster1","Cluster2","Cluster3"),pch=c(22,21,18))

#this is the end of the plotting code
#
# now do linear discriminant analysis, using the clusters as the grouping variable
# linear discriminant analysis - lda
# first get library MAS - it has the lda function in it
#
detach(df3)



# now create lda object, disc1, the variable df2$clusid is the column of cluster ids from kmeans

disc1 <- lda(df1,df2$clusid)

#now create a column that has the group classifications computed by lda

pred1 <- predict(disc1)

#rename the column for convenience

ldaclass <- pred1$class

#create a dataframe that has the original data, the cluster IDs, and the clssses from lda

df4 <-cbind(df2,ldaclass)

table(clusid,ldaclass)

# now turn the dataframe into a file that can be opened in Excel
# this file will be created in the setwd folder

write.csv(df4,file="out_data_with_clus_&_class.csv")
